####Dan Hopkins
####Blogs Project
####R Code October 22 2006

###condor_submit_util -i kerrytest041007.R  -f -N -n 1

##optimizing or estimating?
optimnow <- F

###set figure
fg <- "Kerry"

###symptoms to try if optimizing
sym <- c(25,50,100,150)

###hard-coded symptoms (if estimating)
nsy <- 15

#########automated
library(VA)

###load data
undta <-read.csv("/nfs/fs1/projects/poliblog/kerry.txt",sep=",",na=c("NA","NC","<NA>"),header=T,nrows=10000)

undta$fulldate <- undta$year*10000+undta$month*100+undta$day
breakpoints <- c(20060101,20061001,20061101,20061201,20070101,20070201,20070401)
#breakpoints <- c(20060101,20060401,20060701,20061001,20070101,20070401)

###sort data set
undta4 <- undta[,order(colnames(undta))]

###CREATE PERMUTED CODINGS

undta4$jointcodeKW <- undta4$keneshiawashington.coding
undta4$jointcodeKW[undta4$keneshiawashington.coding %in% c(NA)] <- undta4$keneshiawashington.relev[undta4$keneshiawashington.coding %in% c(NA)]+2

undta4$jointcodeKC <- undta4$kcolton.coding
undta4$jointcodeKC[undta4$kcolton.coding %in% c(NA)] <- undta4$kcolton.relev[undta4$kcolton.coding %in% c(NA)]+2

undta4$jointcodeMK <- undta4$mknowles.coding
undta4$jointcodeMK[undta4$mknowles.coding %in% c(NA)] <- undta4$mknowles.relev[undta4$mknowles.coding %in% c(NA)]+2

undta4$jointcodeNH <- undta4$nickhayes.coding
undta4$jointcodeNH[undta4$nickhayes.coding %in% c(NA)] <- undta4$nickhayes.relev[undta4$nickhayes.coding %in% c(NA)]+2

undta4$jointcodeAP <- undta4$andrewprokop.coding
undta4$jointcodeAP[undta4$andrewprokop.coding %in% c(NA)] <- undta4$andrewprokop.relev[undta4$andrewprokop.coding %in% c(NA)]+2


###matrix of codings

codemat <- cbind(undta4$jointcodeAP,undta4$jointcodeNH,undta4$jointcodeKC,undta4$jointcodeKW)

###function to translate codings
conv.func <- function(mat){
  n<-dim(mat)[1]
  vec <- c()
  for(i in 1:n){
    cd <- codemat[i,]
    if(sum(! cd %in% c(NA))==1)
      res <- cd[which(! cd %in% c(NA))]
    if(sum(! cd %in% c(NA))==0)   
      res <- NA
    
    if(any(c(-2,-1,0,1,2) %in% cd))
      res <- mean(cd[which(cd %in% c(-2,-1,0,1,2))])
    
    if( min(cd,na.rm=T)>2 & ! (sum(cd %in% c(NA))==5    ))
      res <- min(cd,na.rm=T)
    vec <- c(vec,res)
  }

  return(vec)
}

undta4$code1<-conv.func(codemat)
undta4$code1[undta4$code1 %in% c(Inf)] <- NA
#########

###RANDOMLY BREAK TIES
undta4$code2 <- NA

aa <- c(-.01,.01)
ss <- sample(aa,length(undta4$code1[undta4$code1 %in% c(-1.5,-.5,.5,1.5)]),replace=T)
undta4$code2 <- NA
undta4$code1[undta4$code1 %in% c(-1.5,-.5,.5,1.5)] <- undta4$code1[undta4$code1 %in% c(-1.5,-.5,.5,1.5)]+ss

undta4$code2[undta4$code1 <= -1.5 & ! undta4$code1 %in% c(NA)] <- -2
undta4$code2[undta4$code1 > -1.5 & undta4$code1 <= -.5 & ! undta4$code1 %in% c(NA)] <- -1
undta4$code2[undta4$code1 > -.5 & undta4$code1 <= .5 & ! undta4$code1 %in% c(NA)] <- 0
undta4$code2[undta4$code1 > .5 & undta4$code1 <= 1.5 & ! undta4$code1 %in% c(NA)] <- 1
undta4$code2[undta4$code1 > 1.5 & undta4$code1 < 2.5 & ! undta4$code1 %in% c(NA)] <- 2
undta4$code2[undta4$code1 > 2 & ! undta4$code1 %in% c(NA)] <- undta4$code1[undta4$code1 > 2 & ! undta4$code1 %in% c(NA)]

####create new codes

undta5 <- undta4[,order(colnames(undta4))]
wds <-substr(colnames(undta5),start=1,stop=4)
st<-min(which(wds=="WORD"))
fn <-  max(which(wds=="WORD"))


  date.cat <-
  c(min(undta$fulldate,na.rm=T),20060931,20061031,20061131,20061231,20070131,20070301)

  nperiod<-length(date.cat)-1
  period <- cut(undta$fulldate,breaks=date.cat,labels=c(1:nperiod))
  nwords <-fn - st + 1
  dta.BUSH <- as.data.frame(1*(undta5[,st:fn]>0))
  dta.BUSH$jointcode <- undta5$code2

  
  sttxt <- colnames(dta.BUSH)[1]
  fntxt <- colnames(dta.BUSH)[nwords]
  nn<-apply(dta.BUSH[,1:nwords],2,sum)/dim(dta.BUSH)[1]
  nn2 <- nn
  nn2[nn<.01 | nn>.99] <- 0
  wts <- nn2/sum(nn2)
  nvec <- c()
  resmat <-matrix(NA,nperiod,7) 
  train <- dta.BUSH[! dta.BUSH$jointcode %in% c(NA),]
  period.train <- period[! dta.BUSH$jointcode %in% c(NA)]
# zzz <- 1 
  for(zzz in 1:nperiod){
    ttxt<-paste("test",zzz,"<-dta.BUSH[period==zzz & ! period %in% c(NA),]",sep="")
    eval(parse(text=ttxt))
    ttxt <- paste("nvec <- c(nvec,dim(test",zzz,")[1])",sep="")
    eval(parse(text=ttxt))
    ttxt <- paste("nn <- dim(test",zzz,")[1]",sep="")
    eval(parse(text=ttxt))
    
    ttxt<-paste("test",zzz,"$jointcode<-rbinom(nn,size=2,prob=.5)",sep="")
    eval(parse(text=ttxt))

  v1<-apply(train,2,sd)
  remov1<-which(v1==0)

  txt <- paste("v2<-apply(test",zzz,",2,sd)",sep="")
  eval(parse(text=txt))
  remov2<-which(v2==0)

  train2 <- train[,-c(remov1,remov2)]
  txt <- paste("test22 <- test",zzz,"[,-c(remov1,remov2)]",sep="")
    eval(parse(text=txt))
  wts2 <- wts[-c(remov1,remov2)]

    txt <- paste("vout1<-va(cbind(",sttxt,"+...+",fntxt,")~jointcode,data=list(train2,test22),nsymp=nsy,n.subset=200,prob.wt=wts2)",sep="")
    eval(parse(text=txt))
    resmat[zzz,] <- vout1$est.CSMF 
  }
save(resmat,nvec,date.cat,file="/nfs/fs1/home/D/dhopkins/condor/kerry12.Rdata")


